/******************************************
This program is exactly similar to 02.earnings_merge.sas 
(which produces the main dataset) but instead flags terminal
runs for the sandwich test detailed in the paper.

quarterly earnings by state
quarterly earnings overall

*****************************************/


/* first, I need to output all the earnings from individiual degree observations 
   the output of this table is a pik-degree level-job level file, which can then
   be aggregated across qtime, and then made long. */
data earnings_raw (keep=pik opeid degcip year_grad qtime_grad 
                    deglevl_code ui_state nat_e: state_e: )
                    /view=earnings_raw;


        
     set OUTPUTS.grads_appended (in=in_grads)
         INPUTS.phf_interleave_b (in=in_phf) ;
     by pik ; 
        /* creating a hash table to store all the observations of a pik */
     if _n_ = 1 then do ; 
        declare hash h_grad(dataset:'OUTPUTS.grads_appended (obs=0)',ordered:'Y');
		rc=h_grad.defineKey('pik','opeid','degcip','year_grad','qtime_grad','deglevl_code','ui_state');
		rc=h_grad.defineData('pik','opeid','degcip','year_grad','qtime_grad','deglevl_code','ui_state');
		rc=h_grad.defineDone();
		declare hiter i_grad('h_grad');
     end;
    
    array arr_e(&startqtr.:&endqtr.) e&startqtr.-e&endqtr. ;
    array nat_e(&startqtr.:&endqtr.) nat_e&startqtr.-nat_e&endqtr. ;
    array state_e(&startqtr.:&endqtr.) state_e&startqtr.-state_e&endqtr. ;
    
    if in_grads then do; 
        rc=h_grad.add() ; 
    end;
        /* this is where I cycle through earnings */
    if in_phf then do ;
        rc=i_grad.first() ;
        do while (rc=0) ;
            do ii = &startqtr. to &endqtr. ;
                nat_e(ii) = arr_e(ii) ;
                state_e(ii)=arr_e(ii)*(ui_state=state) ;
		/*this will be zero otherwise */
            end;
            output;
            rc=i_grad.next() ;
        end;
     end;
     
     /* now I need to clear the hash table */
     if last.pik then do; 
        rc=h_grad.clear() ;
     end;
run;


                    
/***********************************
    PROC SUMMARY TO AGGREGATE
    OVER ALL JOBS
*************************************/
                   
proc summary data=earnings_raw nway;
        class pik opeid degcip year_grad qtime_grad deglevl_code ui_state;
        var nat_e: state_e: ; 
        output out=earn_totals (drop=_TYPE_ _FREQ_) 
                    sum(nat_e: state_e:)= ; 
run;

proc sort data=earn_totals; 
    by pik opeid deglevl_code degcip year_grad qtime_grad ; 
run;
                    
data earn_long_sandwich (keep=pik opeid degcip year_grad qtime_grad 
                       deglevl_code national_earnings 
                     instate_earnings instate_sandwich_earnings
		     national_sandwich_earnings qtime year quarter
		     ui_state terminal_run) ; 
    
    set earn_totals ;
    by pik opeid deglevl_code degcip ;            
                    
                    
    array nat_e(&startqtr.:&endqtr.) nat_e&startqtr.-nat_e&endqtr. ;
    array state_e(&startqtr.:&endqtr.) state_e&startqtr.-state_e&endqtr. ;
        
    if first.degcip then do ;
        national_earnings  = 0 ;
        instate_earnings = 0 ; 
        terminal_run = 0 ;         
    end;

    do ii = qtime_grad to &endqtr. ;
        national_earnings = nat_e(ii) ; 
        instate_earnings = state_e(ii) ;

        flag = 0 ; 
            /**************************************
            Here, I am going to flag terminal runs
            of zeros in in-state earnings
            ****************************************/
        do jj = ii to &endqtr. ; 
                if state_e(jj) > 0 then flag = flag+1 ;
                
        end; 

        if flag = 0 then terminal_run = 1 ; 

        /* Creating a sandwich estimator */
        if ii < &endqtr. then do; 
            if state_e(ii-1) = . or state_e(ii+1)= . then instate_sandwich_earnings = 0 ;
            else instate_sandwich_earnings = state_e(ii) ;

	    if nat_e(ii-1) = . or nat_e(ii+1)= . then national_sandwich_earnings = 0 ;
            else national_sandwich_earnings = nat_e(ii) ;
        end;

        qtime = ii ; 
        year=%inv_qtime(qtime,year,location=datastep) ;
        quarter=%inv_qtime(qtime,quarter,location=datastep) ;
        output ;
    end;
    
run;

data icf (keep=pik dob male race white black asian hispanic
              pob us_native ethnicity)/view=icf ;
    set ICF.icf_us ;
    male = (sex = 'M') ;
    white = race='1' ;
    black = race = '2' ;
    asian = race = '3' ;
    us_native = (pob = 'A');
    hispanic = (ethnicity='H') ;
run;

data all_earnings_demog ;
    merge earn_long_sandwich (in=a) icf (in=b) ;
    by pik; 
    
    if a; 
run;

proc import datafile="&outpath./opeid_public.dta" 
            out=opeid_public (keep=opeid public) dbms=dta replace;
run;

proc sort data=opeid_public ;
     by opeid ;
run;

data OUTPUTS.earn_long_sandwich ;

    if _N_ = 0 then do; 
        set opeid_public (obs=0) ;
    end; 

    if _N_ = 1 then do ; 
        declare hash h_public (dataset:'WORK.opeid_public');
        rc = h_public.defineKey('opeid') ;
        rc = h_public.defineData('opeid','public') ;
        rc = h_public.defineDone();
    end;

    set all_earnings_demog ; 

    rc = h_public.find();

    if public = 1 or ui_state = '42'; /* PSU gets classified as not a public school,
     	       	      	       which is problematic since thats 
                               the whole state of PA for us.*/
run;

proc sort data=OUTPUTS.earn_long_sandwich ;
     by pik year quarter;
run;

        
proc export data=OUTPUTS.earn_long_sandwich
            outfile="&outstata/sandwich_earnings_test.dta" replace;
run;

/* outputting a quarterly dataset */